library(survival)
library(tidyverse)
library(gtsummary)
library(purrr)
library(zoo)  # For na.locf (last observation carried forward)
library(MASS)
library(ncvreg)
library(devtools)
library(Cyclops)
library(BrokenAdaptiveRidge)
library(flextable)
library(gt)
library(PAmeasures)
library(pec)
library(survMisc)
data.path <- "/Users/jacenai/Desktop/BIOS_215/Project/AIDS/"
aids <- read.csv(file.path(data.path, "AIDS.data.csv")) %>%
  mutate(trt = ifelse(trt %in% c(0, 3), 0, 1)) %>%
  dplyr::select(-pidnum, -hemo, -zprior, -cd40, -cd420, -treat) 

EDA

Summary Statistics

aids.table <- read.csv(file.path(data.path, "AIDS.data.csv")) %>%
  mutate(trt = ifelse(trt %in% c(0, 3), 0, 1)) %>%
  dplyr::select(-pidnum, -hemo, -zprior, -cd420) %>%
  dplyr::select(-time, -cid) %>%
  dplyr::mutate(
    # cid = factor(cid, levels = c(0, 1), labels = c("Censored", "Failure")),
    trt = factor(trt, levels = c(0, 1), labels = c("Monotherapy", "Combination Therapy")),
    homo = factor(homo, levels = c(0, 1), labels = c("No.homosexual.activity", "homosexual.activity")),
    drugs = factor(drugs, levels = c(0, 1), labels = c("No.history.IV.drug.use", "history.IV.drug.use")),
    oprior = factor(oprior, levels = c(0, 1), labels = c("ZDV antiretroviral therapy pre-175", "Non-ZDV antiretroviral therapy pre-175")),
    z30 = factor(z30, levels = c(0 ,1), labels = c("No ZDV in the 30 days prior to 175", "Had ZDV in the 30 days prior to 175")),
    race = factor(race, levels = c(0, 1), labels = c("white", "non-white")),
    gender = factor(gender, levels = c(0, 1), labels = c("Female", "Male")),
    str2 = factor(str2, levels = c(0, 1), labels = c("no antiretroviral history", "had antiretroviral history")),
    symptom = factor(symptom, levels = c(0, 1), labels = c("asymptomatic", "symptomatic")),
    # treat = factor(treat, levels = c(0, 1), labels = c("ZDV only", "others")),
    offtrt = factor(offtrt, levels = c(0, 1), labels = c("no off-trt before 96±5 weeks ", "off-trt before 96±5 weeks")),
    karnof = factor(karnof, levels = c(70, 80, 90, 100), labels = c("70", "80", "90", "100")),
    strat = factor(strat, levels = c(1, 2, 3) , labels = c("Antiretroviral Naive", "> 1 but ≤ 52 weeks of prior antiretroviral therapy", "> 52 weeks"))
  ) 

# Separate continuous and categorical variables
continuous_vars <- aids.table %>% dplyr::select(where(is.numeric))  # Select only numeric (continuous) columns
categorical_vars <- aids.table %>% dplyr::select(where(is.factor))  # Select only categorical variables

# Recombine them in the desired order: Continuous first, then categorical
aids_reordered <- bind_cols(continuous_vars, categorical_vars)

# Run tbl_summary on the reordered dataset
aids_reordered %>%
  tbl_summary(
    by = trt,
    statistic = list(
      all_continuous() ~ "{mean} ± {sd}",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = list(
      all_continuous() ~ 1,  # Round continuous values to 2 decimals
      all_categorical() ~ c(0, 0)  # Round count to integer, percentage to 1 decimal
    ),
    missing = "no"  # Exclude missing category
  ) %>%
  bold_labels() %>%  # Bold variable names
  modify_header(label ~ "**Variables**") %>%  # Change label column name to "Variable"
  
  add_overall(
    statistic = list(
      all_continuous() ~ "{mean} ± {sd}",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = list(
      all_continuous() ~ 1,
      all_categorical() ~ c(0, 0)
    ),
  ) %>%
  
  # modify_table_body( fun = ~ .x %>% arrange(variable) ) %>% # arrange variables alphabetically
  modify_table_body(
    fun = ~ .x %>% arrange(match(variable, names(aids_reordered))) 
  ) %>%
  modify_header(label ~ "**Variable**") %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Treatment**")  
Variable Overall, N = 2,1391 Treatment
Monotherapy, N = 1,0931 Combination Therapy, N = 1,0461
age 35.2 ± 8.7 35.2 ± 8.7 35.3 ± 8.7
wtkg 75.1 ± 13.3 75.4 ± 13.1 74.8 ± 13.4
preanti 379.2 ± 468.7 379.3 ± 470.6 379.0 ± 466.8
treat 1,607 (75%) 561 (51%) 1,046 (100%)
cd40 350.5 ± 118.6 350.3 ± 114.2 350.8 ± 123.0
cd80 986.6 ± 480.2 979.4 ± 489.3 994.2 ± 470.6
cd820 935.4 ± 445.0 936.0 ± 465.3 934.7 ± 422.9
homo


    No.homosexual.activity 725 (34%) 373 (34%) 352 (34%)
    homosexual.activity 1,414 (66%) 720 (66%) 694 (66%)
drugs


    No.history.IV.drug.use 1,858 (87%) 961 (88%) 897 (86%)
    history.IV.drug.use 281 (13%) 132 (12%) 149 (14%)
karnof


    70 9 (0%) 6 (1%) 3 (0%)
    80 80 (4%) 40 (4%) 40 (4%)
    90 787 (37%) 418 (38%) 369 (35%)
    100 1,263 (59%) 629 (58%) 634 (61%)
oprior


    ZDV antiretroviral therapy pre-175 2,092 (98%) 1,068 (98%) 1,024 (98%)
    Non-ZDV antiretroviral therapy pre-175 47 (2%) 25 (2%) 22 (2%)
z30


    No ZDV in the 30 days prior to 175 962 (45%) 498 (46%) 464 (44%)
    Had ZDV in the 30 days prior to 175 1,177 (55%) 595 (54%) 582 (56%)
race


    white 1,522 (71%) 764 (70%) 758 (72%)
    non-white 617 (29%) 329 (30%) 288 (28%)
gender


    Female 368 (17%) 191 (17%) 177 (17%)
    Male 1,771 (83%) 902 (83%) 869 (83%)
str2


    no antiretroviral history 886 (41%) 461 (42%) 425 (41%)
    had antiretroviral history 1,253 (59%) 632 (58%) 621 (59%)
strat


    Antiretroviral Naive 886 (41%) 461 (42%) 425 (41%)
    > 1 but ≤ 52 weeks of prior antiretroviral therapy 410 (19%) 198 (18%) 212 (20%)
    > 52 weeks 843 (39%) 434 (40%) 409 (39%)
symptom


    asymptomatic 1,769 (83%) 908 (83%) 861 (82%)
    symptomatic 370 (17%) 185 (17%) 185 (18%)
offtrt


    no off-trt before 96±5 weeks 1,363 (64%) 693 (63%) 670 (64%)
    off-trt before 96±5 weeks 776 (36%) 400 (37%) 376 (36%)
1 Mean ± SD; n (%)
  # add_p() # Add p-values

Back to top

Kaplan-Meier Curves

Group by treatment and plot the Kaplan-Meier curves.

Based on the survival curves, the effect of combination therapy becomes more beneficial in the long term.

data_directory <- "https://raw.githubusercontent.com/erickawaguchi/BIOSm215/master/functions/" #For later use
library(km.ci)
## Warning: package 'km.ci' was built under R version 4.3.3
source(paste0(data_directory, "conf_bands.R"))
# KME of S(t) for treatment groups
aids.surv <- survfit(Surv(time, cid) ~ trt, data = aids)

plot(aids.surv, lty = 1:2, col = c('blue', 'red'), main = ' Est. Survival Function for Treatment Groups', ylab = 'Estimates Survival Function', xlab = 'Time', conf.int = FALSE)

legend('bottomleft', c('Monotherapy', 'Combination'), col = c('blue', 'red'), lty = 1:2, bty = 'n')

Group by ZDV antiretroviral therapy pre-175 (oprior) and plot the Kaplan-Meier curves.

plot(survfit(Surv(time, cid) ~ (oprior), data = aids, conf.type = 'none'), lty = 1:2, col = c('blue', 'red'), main = ' Est. Survival Function for Non-ZDV antiretroviral therapy pre-175', ylab = 'Estimates Survival Function', xlab = 'Time')

legend('bottomleft', c('No', 'Yes'), col = c('blue', 'red'), lty = 1:2, bty = 'n')

Group by ZDV in the 30 days prior to 175 (z30) and plot the Kaplan-Meier curves.

plot(survfit(Surv(time, cid) ~ (z30), data = aids, conf.type = 'none'), lty = 1:2, col = c('blue', 'red'), main = ' Est. Survival Function for ZDV prior to 175', ylab = 'Estimates Survival Function', xlab = 'Time')

legend('bottomleft', c('No', 'Yes'), col = c('blue', 'red'), lty = 1:2, bty = 'n')

Group by gender and plot the Kaplan-Meier curves.

plot(survfit(Surv(time, cid) ~ gender, data = aids, conf.type = 'none'), lty = 1:2, col = c('blue', 'red'), main = ' Est. Survival Function for Gender Groups', ylab = 'Estimates Survival Function', xlab = 'Time')

legend('bottomleft', c('Female', 'Male'), col = c('blue', 'red'), lty = 1:2, bty = 'n')

Group by homosexual activity (homo) and plot the Kaplan-Meier curves.

plot(survfit(Surv(time, cid) ~ homo, data = aids, conf.type = 'none'), lty = 1:2, col = c('blue', 'red'), main = ' Est. Survival Function for Two Sexual Orientation Groups', ylab = 'Estimates Survival Function', xlab = 'Time')

legend('bottomleft', c('No homosexual activity', 'has homosexual activity'), col = c('blue', 'red'), lty = 1:2, bty = 'n')

Group by interaction between race and gender and plot the Kaplan-Meier curves.

aids.1 = aids %>%
  mutate(demo = case_when(
    gender == 0 & race == 0 ~ 1,
    gender == 0 & race == 1 ~ 2,
    gender == 1 & race == 0 ~ 3,
    gender == 1 & race == 1 ~ 4
  ))

plot(survfit(Surv(time, cid) ~ demo, data = aids.1, conf.type = 'none'), lty = 1:4, col = c('blue', 'red','orange', 'green'), main = ' Est. Survival Function for Different Demographic Groups', ylab = 'Estimates Survival Function', xlab = 'Time')

legend('bottomleft', c('White Female', 'Non-white Female', "White Male", "Non-white Male"), col = c('blue', 'red','orange', 'green'), lty = 1:4, bty = 'n')

Log-rank Test

# Log-Rank Test (W = 1)
survdiff(Surv(time, cid) ~ trt, data = aids, rho = 0)
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, rho = 0)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 1093      309      255      11.4      22.4
## trt=1 1046      212      266      11.0      22.4
## 
##  Chisq= 22.4  on 1 degrees of freedom, p= 2e-06
#Log-Rank Test with Peto-Peto Weight
survdiff(Surv(time, cid) ~ trt, data = aids, rho = 1)
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, rho = 1)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 1093      272      223      10.9      24.5
## trt=1 1046      182      231      10.5      24.5
## 
##  Chisq= 24.5  on 1 degrees of freedom, p= 8e-07
fit <- ten(Surv(time, cid) ~ trt, data = aids) #Save as a "ten" object
comp(fit, p = c(1, 0.5, 0), q = c(0, 0.5, 1)) #Calculates log-rank test statistics
##                          Q         Var       Z pNorm
## 1              -5.4019e+01  1.3009e+02 -4.7362     3
## n              -1.0111e+05  3.8641e+08 -5.1434     4
## sqrtN          -2.3378e+03  2.2096e+05 -4.9734     5
## S1             -4.9288e+01  9.9391e+01 -4.9439     8
## S2             -4.9262e+01  9.9273e+01 -4.9442     7
## FH_p=1_q=0     -4.9349e+01  9.9618e+01 -4.9443     6
## FH_p=0.5_q=0.5 -1.3704e+01  1.3781e+01 -3.6916     1
## FH_p=0_q=1     -4.6703e+00  2.9092e+00 -2.7381     2
##                   maxAbsZ        Var      Q pSupBr
## 1              5.7459e+01 1.3009e+02 5.0377      8
## n              1.0451e+05 3.8641e+08 5.3164      2
## sqrtN          2.4446e+03 2.2096e+05 5.2005      3
## S1             5.1823e+01 9.9391e+01 5.1982      6
## S2             5.1795e+01 9.9273e+01 5.1984      5
## FH_p=1_q=0     5.1887e+01 9.9618e+01 5.1986      4
## FH_p=0.5_q=0.5 1.5222e+01 1.3781e+01 4.1005      7
## FH_p=0_q=1     5.5820e+00 2.9092e+00 3.2727      1

Back to top

Cox’s model

Check Assumptions

Baseline cumulative hazard plot

fit <- basehaz(coxph(Surv(time, cid) ~ age + wtkg + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) +  cd80 + cd820 +
                       strata(factor(trt)), 
                data = aids, ties = 'breslow'), centered = TRUE)




# Create individual data frames
mono.treatment <- data.frame(
  "time" = fit$time[fit$strata == 0],
  "H1" = fit$hazard[fit$strata == 0])

combined.treatment <- data.frame(
  "time" = fit$time[fit$strata == 1],
  "H2" = fit$hazard[fit$strata == 1])


# List of data frames
df_list <- list(mono.treatment, combined.treatment)


# Merge all data frames by 'time'
merged_df <- reduce(df_list, full_join, by = "time") %>%
  arrange(time) %>%
  na.locf(na.rm = FALSE)  # Fill missing values

# # Create the plot
# plot(log(merged_df$H1) ~ merged_df$time, type = 's',
#      ylab = 'Log Cumulative Hazard', xlab = 'Time', 
#      main = 'Log H(t) vs. Time', col = "red", lty = 1, ylim = c(-3.5, 1))
# 
# # Add lines for other groups
# lines(log(merged_df$H2) ~ merged_df$time, type = "s", lty = 1, col = "blue")
# 
# # Add a legend
# legend("bottomright", c("mono treatment", "combined treatment"),
#        lty = 1, col = c("red", "blue", "green", "purple"), 
#        bty = "n", cex = .9)


#Convert to long format for ggplot
long_df <- merged_df %>%
  pivot_longer(cols = starts_with("H"), names_to = "Group", values_to = "Hazard")

# Replace group names for better legend readability
long_df$Group <- recode(long_df$Group,
                        "H1" = "Mono Treatment",
                        "H2" = "Combined Treatment")

# Plot using ggplot2
ggplot(long_df, aes(x = time, y = log(Hazard), color = Group)) +
  geom_step(size = 1.2, linetype = "solid") +  # Thicker lines for visibility
  labs(title = NULL,  # Remove title
       x = "Time",
       y = expression(log(H(t)))) +  # Use math notation for log(H(t))
  scale_color_manual(values = c("#D55E00", "#0072B2", "#009E73", "#CC79A7")) +  # Scientific color palette
  theme_classic(base_size = 16) +  # Use a clean, high-impact journal theme
  theme(
    axis.title = element_text(face = "bold", size = 18),  # Bold axis labels
    axis.text = element_text(size = 16),  # Large tick labels
    legend.title = element_blank(),  # No legend title
    legend.position = "bottom",  # Position legend for clarity
    legend.text = element_text(size = 16),  # Larger legend text
    panel.grid.major = element_blank(),  # Remove major grid lines
    panel.grid.minor = element_blank()   # Remove minor grid lines
  )

Baseline cumulative hazard plot

fit <- basehaz(coxph(Surv(time, cid) ~ age + wtkg + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) + 
                       factor(offtrt) +  cd80 + cd820 +
                       strata(factor(trt)), 
                data = aids, ties = 'breslow'), centered = TRUE)




# Create individual data frames
mono.treatment <- data.frame(
  "time" = fit$time[fit$strata == 0],
  "H1" = fit$hazard[fit$strata == 0])

combined.treatment <- data.frame(
  "time" = fit$time[fit$strata == 1],
  "H2" = fit$hazard[fit$strata == 1])


# List of data frames
df_list <- list(mono.treatment, combined.treatment)


# Merge all data frames by 'time'
merged_df <- reduce(df_list, full_join, by = "time") %>%
  arrange(time) %>%
  na.locf(na.rm = FALSE)  # Fill missing values

# # Create the plot
plot(log(merged_df$H1) ~ merged_df$time, type = 's',
     ylab = 'Log Cumulative Hazard', xlab = 'Time',
     main = 'Log H(t) vs. Time', col = "red", lty = 1, ylim = c(-8.5, 1))

# Add lines for other groups
lines(log(merged_df$H2) ~ merged_df$time, type = "s", lty = 1, col = "blue")

# Add a legend
legend("bottomright", c("mono treatment", "combined treatment"),
       lty = 1, col = c("red", "blue", "green", "purple"),
       bty = "n", cex = .9)

Anderson plot

Check for the proportional hazards assumption by Anderson plot

reptime <- function(l, t, range_of_time){
  x <- numeric(length(range_of_time))
  for(i in seq(range_of_time)){
    diff <- range_of_time[i] - t
    diff <- diff[diff >= 0]
    if(length(diff) > 0){
      x[i] <- l[which.min(diff)]
    }else{
      x[i] <- 0
    }
  }
  return(x)
}


fit3 <- basehaz(coxph(Surv(time, cid) ~ age + wtkg + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) +  cd80 + cd820 +
                       strata(factor(trt)), 
                      data = aids, ties = 'breslow'), centered = F)

H1 <- fit3$hazard[fit3$strata == 0]
H2 <- fit3$hazard[fit3$strata == 1]
t1 <- fit3$time[fit3$strata == 0]
t2 <- fit3$time[fit3$strata == 1]

H1 <- reptime(H1, t1, 1:2000)
H2 <- reptime(H2, t2, 1:2000)


par(mfrow = c(1, 2))  # Set up side-by-side plots

# First Plot
plot(log(merged_df$H1) ~ merged_df$time, type = 's',
     ylab = 'Log H(t)', 
     xlab = 'Time',
     main = element_blank(), 
     col = "red", lty = 1, ylim = c(-8.5, 1))

# Add lines for other groups
lines(log(merged_df$H2) ~ merged_df$time, type = "s", lty = 1, col = "blue")

# Add a legend
legend("topleft", c("mono treatment", "combined treatment"),
       lty = 1, col = c("red", "blue"),
       bty = "n", cex = .9)

# Add label (a)
mtext("(a)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

# Second Plot
plot(H2 ~ H1, main = element_blank(),
     ylab = 'combined treatment', xlab = 'mono treatment',
     type = 's', xlim = c(0, 1), ylim = c(0, 1))

# Add 45-degree reference line
# Fit a regression model through the origin
model <- lm(H2 ~ H1 - 1)  # '-1' removes the intercept

# Add the fitted regression line
abline(model, col = "red", lwd = 2, lty = 2)  # Blue regression line

# Add label for the reference line
legend("topleft", legend = "Reference line",
       col = "red", lty = 2, bty = "n")

# Add label (b)
mtext("(b)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

Schoenfeld residuals test

Check for the proportional hazards assumption using the Schoenfeld residuals test.

aids.fit <- coxph(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                   data = aids, ties = 'breslow')
summary(aids.fit)
## Call:
## coxph(formula = Surv(time, cid) ~ age + wtkg + factor(homo) + 
##     factor(drugs) + karnof + factor(oprior) + factor(z30) + preanti + 
##     factor(race) + factor(gender) + factor(str2) + strat + factor(symptom) + 
##     factor(offtrt) + cd80 + cd820 + factor(trt), data = aids, 
##     ties = "breslow")
## 
##   n= 2139, number of events= 521 
## 
##                        coef  exp(coef)   se(coef)      z Pr(>|z|)    
## age               0.0119096  1.0119809  0.0052362  2.274  0.02294 *  
## wtkg              0.0019335  1.0019353  0.0034800  0.556  0.57849    
## factor(homo)1    -0.0369980  0.9636781  0.1267317 -0.292  0.77033    
## factor(drugs)1   -0.4261507  0.6530180  0.1509507 -2.823  0.00476 ** 
## karnof           -0.0216934  0.9785402  0.0071612 -3.029  0.00245 ** 
## factor(oprior)1   0.1847325  1.2028966  0.2733062  0.676  0.49909    
## factor(z30)1      0.3100522  1.3634962  0.2193701  1.413  0.15755    
## preanti           0.0002707  1.0002707  0.0001562  1.733  0.08306 .  
## factor(race)1    -0.0126148  0.9874644  0.1101237 -0.115  0.90880    
## factor(gender)1   0.1052503  1.1109886  0.1632832  0.645  0.51919    
## factor(str2)1     0.1122370  1.1187780  0.2993203  0.375  0.70768    
## strat            -0.0775970  0.9253373  0.1598124 -0.486  0.62729    
## factor(symptom)1  0.5371378  1.7111023  0.1033624  5.197 2.03e-07 ***
## factor(offtrt)1   0.7671321  2.1535812  0.0911278  8.418  < 2e-16 ***
## cd80              0.0005666  1.0005667  0.0001335  4.244 2.20e-05 ***
## cd820            -0.0004446  0.9995555  0.0001535 -2.897  0.00377 ** 
## factor(trt)1     -0.4387301  0.6448548  0.0900759 -4.871 1.11e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## age                 1.0120     0.9882    1.0016    1.0224
## wtkg                1.0019     0.9981    0.9951    1.0088
## factor(homo)1       0.9637     1.0377    0.7517    1.2354
## factor(drugs)1      0.6530     1.5314    0.4858    0.8778
## karnof              0.9785     1.0219    0.9649    0.9924
## factor(oprior)1     1.2029     0.8313    0.7040    2.0553
## factor(z30)1        1.3635     0.7334    0.8870    2.0960
## preanti             1.0003     0.9997    1.0000    1.0006
## factor(race)1       0.9875     1.0127    0.7958    1.2253
## factor(gender)1     1.1110     0.9001    0.8067    1.5300
## factor(str2)1       1.1188     0.8938    0.6222    2.0115
## strat               0.9253     1.0807    0.6765    1.2657
## factor(symptom)1    1.7111     0.5844    1.3973    2.0954
## factor(offtrt)1     2.1536     0.4643    1.8013    2.5747
## cd80                1.0006     0.9994    1.0003    1.0008
## cd820               0.9996     1.0004    0.9993    0.9999
## factor(trt)1        0.6449     1.5507    0.5405    0.7694
## 
## Concordance= 0.677  (se = 0.012 )
## Likelihood ratio test= 200.9  on 17 df,   p=<2e-16
## Wald test            = 212.3  on 17 df,   p=<2e-16
## Score (logrank) test = 218.2  on 17 df,   p=<2e-16
(schoenfeld_test <- cox.zph(aids.fit))
##                    chisq df       p
## age              1.11449  1  0.2911
## wtkg             0.31819  1  0.5727
## factor(homo)     0.22532  1  0.6350
## factor(drugs)    0.46626  1  0.4947
## karnof           0.58677  1  0.4437
## factor(oprior)   0.49217  1  0.4830
## factor(z30)      0.01785  1  0.8937
## preanti          0.63321  1  0.4262
## factor(race)     0.00309  1  0.9557
## factor(gender)   0.00155  1  0.9686
## factor(str2)     0.20962  1  0.6471
## strat            0.13095  1  0.7174
## factor(symptom)  1.24924  1  0.2637
## factor(offtrt)  18.40673  1 1.8e-05
## cd80             2.11871  1  0.1455
## cd820            0.09158  1  0.7622
## factor(trt)      7.10497  1  0.0077
## GLOBAL          38.93975 17  0.0018
df = as.data.frame(round(schoenfeld_test$table, 2))
df$variable = c("age", "weight", "homo", "drugs", "karnof", "oprior", "z30", "preanti", "race", "gender", "str2", "strat", "symptom", "offtrt", "cd80", "cd820",  "treatment", "GLOBAL")
df = data.frame("Variable" = df$variable, "Chisq" = df$chisq, "DF" = df$df, "p-value"= df$p)
ft_ph = flextable(df)
ft_ph = set_caption(ft_ph, caption = "Schoenfeld Residuals Test on PH Assumption")
ft_ph <- ft_ph %>%
  bold(i = c(14, 17), bold = TRUE)
ft_ph
Schoenfeld Residuals Test on PH Assumption

Variable

Chisq

DF

p.value

age

1.11

1

0.29

weight

0.32

1

0.57

homo

0.23

1

0.64

drugs

0.47

1

0.49

karnof

0.59

1

0.44

oprior

0.49

1

0.48

z30

0.02

1

0.89

preanti

0.63

1

0.43

race

0.00

1

0.96

gender

0.00

1

0.97

str2

0.21

1

0.65

strat

0.13

1

0.72

symptom

1.25

1

0.26

offtrt

18.41

1

0.00

cd80

2.12

1

0.15

cd820

0.09

1

0.76

treatment

7.10

1

0.01

GLOBAL

38.94

17

0.00

Conclusion:

From the Schoenfeld Residuals Test, the variable treatment violates the PH assumption, so a Cox’s PH model is not appropriate.

Schoenfeld residuals plots

par(mfrow = c(1, 2))  # Set up side-by-side plots
plot(schoenfeld_test)

# check the Schoenfeld residuals plots for treatment
plot(schoenfeld_test, var = 17)
abline(h = coef(aids.fit)[17], col = "red", lwd = 2)

df1 = as.data.frame(round(summary(aids.fit)$coef, 2))
df1$variable = c("age", "weight", "homo", "drugs", "karnof", "oprior", "z30", "preanti", "race", "gender", "str2", "strat", "symptom", "offtrt", "cd80", "cd820",  "treatment")
df1 = df1[,c(6,1,2,3,4,5)]
ft_cox = flextable(df1)
ft_cox = set_caption(ft_cox, caption = "Cox's Model Coef Table")
ft_cox
Cox's Model Coef Table

variable

coef

exp(coef)

se(coef)

z

Pr(>|z|)

age

0.01

1.01

0.01

2.27

0.02

weight

0.00

1.00

0.00

0.56

0.58

homo

-0.04

0.96

0.13

-0.29

0.77

drugs

-0.43

0.65

0.15

-2.82

0.00

karnof

-0.02

0.98

0.01

-3.03

0.00

oprior

0.18

1.20

0.27

0.68

0.50

z30

0.31

1.36

0.22

1.41

0.16

preanti

0.00

1.00

0.00

1.73

0.08

race

-0.01

0.99

0.11

-0.11

0.91

gender

0.11

1.11

0.16

0.64

0.52

str2

0.11

1.12

0.30

0.37

0.71

strat

-0.08

0.93

0.16

-0.49

0.63

symptom

0.54

1.71

0.10

5.20

0.00

offtrt

0.77

2.15

0.09

8.42

0.00

cd80

0.00

1.00

0.00

4.24

0.00

cd820

0.00

1.00

0.00

-2.90

0.00

treatment

-0.44

0.64

0.09

-4.87

0.00

Conclusions:

  1. Sexual orientation and gender are non-significant predictors.

  2. A hazard ratio of 0.6449 means that individuals receiving a combination therapy has a 35.5% lower hazard of failure compared to individuals receiving a mono-therapy. So a combination therapy is associated with better survival.

Stratified Test

Gender

Stratified test of treatment effect while adjusting for gender

# Looking at females
survdiff(Surv(time, cid) ~ trt, data = aids, subset = (gender == 0))
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, subset = (gender == 
##     0))
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 191       47     36.8      2.82      5.63
## trt=1 177       27     37.2      2.79      5.63
## 
##  Chisq= 5.6  on 1 degrees of freedom, p= 0.02
# Looking at males
survdiff(Surv(time, cid) ~ trt, data = aids, subset = (gender == 1))
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, subset = (gender == 
##     1))
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 902      262      218      8.85      17.3
## trt=1 869      185      229      8.43      17.3
## 
##  Chisq= 17.3  on 1 degrees of freedom, p= 3e-05
# Stratified test
survdiff(Surv(time, cid) ~ trt + strata(gender), data = aids)
## Call:
## survdiff(formula = Surv(time, cid) ~ trt + strata(gender), data = aids)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 1093      309      255      11.5      22.5
## trt=1 1046      212      266      11.0      22.5
## 
##  Chisq= 22.5  on 1 degrees of freedom, p= 2e-06

Homo

Stratified test of treatment effect while adjusting for sexual orientation

# No homosexual activity 
survdiff(Surv(time, cid) ~ trt, data = aids, subset = (homo == 0))
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, subset = (homo == 
##     0))
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 373       94     74.4      5.14        10
## trt=1 352       59     78.6      4.87        10
## 
##  Chisq= 10  on 1 degrees of freedom, p= 0.002
# With homosexual activity 
survdiff(Surv(time, cid) ~ trt, data = aids, subset = (homo == 1))
## Call:
## survdiff(formula = Surv(time, cid) ~ trt, data = aids, subset = (homo == 
##     1))
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 720      215      181      6.52      12.8
## trt=1 694      153      187      6.29      12.8
## 
##  Chisq= 12.8  on 1 degrees of freedom, p= 3e-04
# Stratified test
survdiff(Surv(time, cid) ~ trt + strata(homo), data = aids)
## Call:
## survdiff(formula = Surv(time, cid) ~ trt + strata(homo), data = aids)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=0 1093      309      255      11.4      22.3
## trt=1 1046      212      266      10.9      22.3
## 
##  Chisq= 22.3  on 1 degrees of freedom, p= 2e-06

Variable Selection

AIC

# Perform stepwise selection to find the best model according to AIC
best.model.aic <- stepAIC(aids.fit, direction="both", 
                          k=2, trace=0)
summary(best.model.aic)
## Call:
## coxph(formula = Surv(time, cid) ~ age + factor(drugs) + karnof + 
##     factor(z30) + preanti + factor(symptom) + factor(offtrt) + 
##     cd80 + cd820 + factor(trt), data = aids, ties = "breslow")
## 
##   n= 2139, number of events= 521 
## 
##                        coef  exp(coef)   se(coef)      z Pr(>|z|)    
## age               0.0124647  1.0125427  0.0051020  2.443  0.01456 *  
## factor(drugs)1   -0.4385999  0.6449387  0.1473238 -2.977  0.00291 ** 
## karnof           -0.0221635  0.9780803  0.0070968 -3.123  0.00179 ** 
## factor(z30)1      0.3054998  1.3573032  0.1187689  2.572  0.01010 *  
## preanti           0.0002320  1.0002320  0.0001126  2.060  0.03940 *  
## factor(symptom)1  0.5362271  1.7095448  0.1016969  5.273 1.34e-07 ***
## factor(offtrt)1   0.7724633  2.1650930  0.0908059  8.507  < 2e-16 ***
## cd80              0.0005677  1.0005679  0.0001327  4.279 1.88e-05 ***
## cd820            -0.0004428  0.9995573  0.0001531 -2.891  0.00384 ** 
## factor(trt)1     -0.4383620  0.6450922  0.0898440 -4.879 1.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## age                 1.0125     0.9876    1.0025    1.0227
## factor(drugs)1      0.6449     1.5505    0.4832    0.8608
## karnof              0.9781     1.0224    0.9646    0.9918
## factor(z30)1        1.3573     0.7368    1.0754    1.7131
## preanti             1.0002     0.9998    1.0000    1.0005
## factor(symptom)1    1.7095     0.5850    1.4006    2.0866
## factor(offtrt)1     2.1651     0.4619    1.8121    2.5868
## cd80                1.0006     0.9994    1.0003    1.0008
## cd820               0.9996     1.0004    0.9993    0.9999
## factor(trt)1        0.6451     1.5502    0.5409    0.7693
## 
## Concordance= 0.675  (se = 0.012 )
## Likelihood ratio test= 199.2  on 10 df,   p=<2e-16
## Wald test            = 210.5  on 10 df,   p=<2e-16
## Score (logrank) test = 216.2  on 10 df,   p=<2e-16

BIC

n <- nrow(aids)
best.model.bic <- stepAIC(aids.fit, direction="both", 
                          k=log(n), trace=0)
best.model.aic
## Call:
## coxph(formula = Surv(time, cid) ~ age + factor(drugs) + karnof + 
##     factor(z30) + preanti + factor(symptom) + factor(offtrt) + 
##     cd80 + cd820 + factor(trt), data = aids, ties = "breslow")
## 
##                        coef  exp(coef)   se(coef)      z        p
## age               0.0124647  1.0125427  0.0051020  2.443  0.01456
## factor(drugs)1   -0.4385999  0.6449387  0.1473238 -2.977  0.00291
## karnof           -0.0221635  0.9780803  0.0070968 -3.123  0.00179
## factor(z30)1      0.3054998  1.3573032  0.1187689  2.572  0.01010
## preanti           0.0002320  1.0002320  0.0001126  2.060  0.03940
## factor(symptom)1  0.5362271  1.7095448  0.1016969  5.273 1.34e-07
## factor(offtrt)1   0.7724633  2.1650930  0.0908059  8.507  < 2e-16
## cd80              0.0005677  1.0005679  0.0001327  4.279 1.88e-05
## cd820            -0.0004428  0.9995573  0.0001531 -2.891  0.00384
## factor(trt)1     -0.4383620  0.6450922  0.0898440 -4.879 1.07e-06
## 
## Likelihood ratio test=199.2  on 10 df, p=< 2.2e-16
## n= 2139, number of events= 521

LASSO

set.seed(123)
X <- as.matrix(aids %>% dplyr::select(-c(cid, time)))
survtime <- aids$time
delta <- aids$cid
fit <- cv.ncvsurv(X, Surv(survtime, delta), penalty = 'lasso',  
                    nlambda = 10, nfolds = 10)
fit.lasso <- ncvsurv(X, Surv(survtime, delta), penalty = 'lasso', 
                    lambda = fit$lambda.min)
# coef(fit.lasso)  

# only keep/select the coef(fit.lasso)  with non-zero coefficients
coef(fit.lasso)[coef(fit.lasso) != 0]
##           trt           age          wtkg         drugs        karnof 
## -0.4191515317  0.0110270068  0.0014611220 -0.3924863347 -0.0208918143 
##        oprior           z30       preanti          race        gender 
##  0.1498586663  0.2919157343  0.0002205492 -0.0008918510  0.0664088885 
##          str2       symptom        offtrt          cd80         cd820 
##  0.0160700914  0.5180644577  0.7548336170  0.0005003386 -0.0003711303
names(coef(fit.lasso)[coef(fit.lasso) != 0])
##  [1] "trt"     "age"     "wtkg"    "drugs"   "karnof"  "oprior"  "z30"    
##  [8] "preanti" "race"    "gender"  "str2"    "symptom" "offtrt"  "cd80"   
## [15] "cd820"

CoxBAR

lambda <- 10
dataFit  <- createCyclopsData(Surv(survtime, delta) ~ X, modelType = "cox")
barPrior <- createBarPrior(penalty = log(lambda), 
                           initialRidgeVariance = 1/log(lambda)) 
# barPrior <- createBarPrior(penalty="bic")
fit.bar      <- fitCyclopsModel(dataFit, prior = barPrior)
# coef(fit.bar)

# only keep/select the coef(fit.bar)  with non-zero coefficients
coef(fit.bar)[coef(fit.bar) != 0]
##          Xtrt       Xkarnof      Xpreanti      Xsymptom       Xofftrt 
## -0.3686930786 -0.0166092513  0.0004120094  0.5135137517  0.6983361593

SCAD

fit <- cv.ncvsurv(X, Surv(survtime, delta), penalty = 'SCAD', 
                    nlambda = 10, nfolds = 50)
fit.scad <- ncvsurv(X, Surv(survtime, delta), penalty = 'SCAD', 
                    lambda = fit$lambda.min)
# coef(fit.scad)  

# only keep/select the coef(fit.scad)  with non-zero coefficients
coef(fit.scad)[coef(fit.scad) != 0]
##           trt           age         drugs        karnof        oprior 
## -0.4388024117  0.0108699363 -0.4132929945 -0.0222034021  0.0098459794 
##           z30       preanti       symptom        offtrt          cd80 
##  0.3037341392  0.0002358990  0.5374743537  0.7698823357  0.0005689677 
##         cd820 
## -0.0004436918

MCP

fit <- cv.ncvsurv(X, Surv(survtime, delta), penalty = 'MCP',  
                    nlambda = 30, nfolds = 10)
fit.MCP <- ncvsurv(X, Surv(survtime, delta), penalty = 'MCP', 
                    lambda = fit$lambda.min)
# coef(fit.MCP) 

# only keep/select the coef(fit.MCP)  with non-zero coefficients
coef(fit.MCP)[coef(fit.MCP) != 0]
##           trt           age          wtkg         drugs        karnof 
## -0.4383918255  0.0122983811  0.0003532726 -0.4333006431 -0.0219130789 
##        oprior           z30       preanti        gender       symptom 
##  0.0863840320  0.3114592643  0.0002280159  0.0195643206  0.5345946432 
##        offtrt          cd80         cd820 
##  0.7725227913  0.0005668817 -0.0004419579
# List of variables from different selection results
coef_AIC <- c("age", "drugs", "karnof", "z30", "preanti", "symptom",
              "offtrt", "cd80", "cd820", "trt")
    
coef_BIC <- c("age", "drugs", "karnof", "z30", "preanti", "symptom",
              "offtrt", "cd80", "cd820", "trt")


# Extract nonzero coefficient names from each model
coef_lasso <- names(coef(fit.lasso))[coef(fit.lasso) != 0]
coef_scad <- names(coef(fit.scad))[coef(fit.scad) != 0]
coef_mcp <- names(coef(fit.MCP))[coef(fit.MCP) != 0]

# Find intersection of selected variables across models
common_variables <- Reduce(intersect, list(coef_lasso, coef_scad, coef_mcp,
                                           coef_AIC, coef_BIC))
common_variables
##  [1] "trt"     "age"     "drugs"   "karnof"  "z30"     "preanti" "symptom"
##  [8] "offtrt"  "cd80"    "cd820"

Selected variables

The common variables selected by all methods are: trt, age, drugs, karnof, z30, preanti, symptom, offtrt, cd80, cd820.

Back to top

AFT model

Best distribution

Prepare the data

aids <- aids %>%
  mutate(across(c(homo, drugs, oprior, z30, race, gender, str2, symptom, offtrt, trt), as.factor))

Exponential

# AFT model with exponential distribution
# Fit AFT model with exponential distribution
aids.aft.exponential <- survreg(Surv(time, cid) ~ age + wtkg + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "exponential", x=TRUE, y=TRUE)

Weibull

# AFT model with Weibull distribution
aids.aft.weibull <- survreg(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "weibull")

Gaussian

# AFT model with Gaussian distribution
aids.aft.gaussian <- survreg(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "gaussian")

Logistic

# AFT model with logistic
aids.aft.logistic <- survreg(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "logistic")

Lognormal

# AFT model with Lognormal
aids.aft.lognormal <- survreg(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "lognormal")

Loglogistic

# AFT model with loglogistic
aids.aft.loglogistic <- survreg(Surv(time, cid) ~ age + wtkg  + factor(homo) + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) + factor(gender) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "loglogistic")

Goodness-of-fit

Log-likelihood

# select the model with the highest log likelihood
logLik_values <- c(
  exponential = logLik(aids.aft.exponential),
  weibull = logLik(aids.aft.weibull),
  gaussian = logLik(aids.aft.gaussian),
  logistic = logLik(aids.aft.logistic),
  lognormal = logLik(aids.aft.lognormal),
  loglogistic = logLik(aids.aft.loglogistic)
)

rank(logLik_values)
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##           3           4           2           1           6           5
print(round(logLik_values, 2))
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##    -4698.58    -4639.76    -4713.67    -4739.19    -4622.42    -4629.39
best_model <- names(which.max(logLik_values))
cat("The best AFT model based on log-likelihood is:", best_model)
## The best AFT model based on log-likelihood is: lognormal

Cox-Snell residual

#Get Cox-Snell Residuals
par(mfrow = c(3,2), mar = c(4.5, 4.5, 2, 1), oma = c(3, 3, 2, 1), 
    mgp = c(2.5, 1, 0), las = 1, cex.lab = 1.5, cex.axis = 1.2)

#Logistic
eta <- predict(aids.aft.logistic, type = 'lp')
sigma <- aids.aft.logistic$scale

# Define a range of time points for estimation
time_seq <- seq(min(aids$time), max(aids$time), length.out = 100)

r.log <- -log(1 - plogis((aids$time - eta) / sigma))

fit   <- survfit(Surv(r.log, aids$cid) ~ 1)
H.log <- cumsum(fit$n.event / fit$n.risk)

plot(H.log ~ fit$time, type = 'l', main = 'Logistic Regression',
     ylab = element_blank(), xlab = element_blank(), font.lab = 2, cex.lab = 1.5)
abline(0, 1, col='red',  lty=2)
mtext("(a)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)


#Gaussian
sigma  <- aids.aft.gaussian$scale
eta    <- -aids.aft.gaussian$linear.predictors / sigma

r.g <- -log(1 - pnorm((aids$time - aids.aft.gaussian$linear.predictors) / sigma))

fit   <- survfit(Surv(r.g, aids$cid) ~ 1)
H.g   <- cumsum(fit$n.event / fit$n.risk)

plot(H.g ~ fit$time, type = 'l', main = 'Gaussian Regression',
     ylab = element_blank(), xlab = element_blank())
abline(0, 1, col='red',  lty=2)
mtext("(b)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

#Exponential
sigma <- aids.aft.exponential$scale
eta   <- -aids.aft.exponential$linear.predictors/sigma

r.exp <- aids$time * exp(eta)

fit   <- survfit(Surv(r.exp, aids$cid) ~ 1)
H.exp <- cumsum(fit$n.event / fit$n.risk)

plot(H.exp ~ fit$time, type = 'l', main = 'Exponential Regression',
     ylab = 'Estimated Cumulative Hazard', xlab = element_blank(), font.lab = 2, cex.lab = 1.5)
abline(0, 1, col='red',  lty=2)
mtext("(c)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

#Weibull
sigma  <- aids.aft.weibull$scale
alpha  <- 1 / sigma
eta    <- -aids.aft.weibull$linear.predictors / sigma

r.wb <- aids$time^alpha * exp(eta)

fit   <- survfit(Surv(r.wb, aids$cid) ~ 1)
H.wb  <- cumsum(fit$n.event/fit$n.risk)

plot(H.wb ~ fit$time, type = 'l', main = 'Weibull Regression',
     ylab = element_blank(), xlab = element_blank(), font.lab = 2, cex.lab = 1.5)
abline(0, 1, col='red',  lty=2)
mtext("(d)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

#Log-Logistic
sigma  <- aids.aft.loglogistic$scale
alpha  <- 1 / sigma
eta    <- -aids.aft.loglogistic$linear.predictors / sigma

r.ll  <- -log(1/(1 + aids$time^alpha*exp(eta)))

fit   <- survfit(Surv(r.ll, aids$cid) ~ 1)
H.ll  <- cumsum(fit$n.event / fit$n.risk)

plot(H.ll ~ fit$time, type = 'l', main = 'Log-Logistic Regression',
     ylab = element_blank(), xlab = 'Cox-Snell Residual', font.lab = 2, cex.lab = 1.5)
abline(0, 1, col='red',  lty=2)
mtext("(e)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

#Log-Normal Distribution
eta    <- -aids.aft.lognormal$linear.predictors / aids.aft.lognormal$scale
r.ln   <- -log(1 - pnorm((log(aids$time) - aids.aft.lognormal$linear.predictors) / aids.aft.lognormal$scale))

fit   <- survfit(Surv(r.ln, aids$cid) ~ 1)
H.ln  <- cumsum(fit$n.event/fit$n.risk)

plot(H.ln ~ fit$time, type = 'l', main = 'Log-Normal Regression',
     ylab = element_blank(), xlab = 'Cox-Snell Residual', font.lab = 2, cex.lab = 1.5)
abline(0, 1, col='red',  lty = 2) 
mtext("(f)", side = 3, line = 0.5, adj = 0, cex = 1.2, font = 2)

AIC

# Extract AIC for each model
aic_values <- c(
  exponential = AIC(aids.aft.exponential),
  weibull = AIC(aids.aft.weibull),
  gaussian = AIC(aids.aft.gaussian),
  logistic = AIC(aids.aft.logistic),
  lognormal = AIC(aids.aft.lognormal),
  loglogistic = AIC(aids.aft.loglogistic)
)

# Print all AIC values
print(aic_values)
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##    9433.156    9317.510    9465.340    9516.372    9282.835    9296.777
# Identify the model with the lowest AIC
best_aic_model <- names(which.min(aic_values))
best_aic_value <- min(aic_values)

cat("The best AFT model based on AIC is:", best_aic_model, "with an AIC of", best_aic_value, "\n")
## The best AFT model based on AIC is: lognormal with an AIC of 9282.835

Therefore, the best AFT model is the Lognormal distribution based on the log-likelihood, Harrell’s C statistic, Uno’s C, and AIC.

Check Log-Linearity Assumption for AFT model

The AFT model assumes a log-linear relationship between survival time and covariates:

\[ log(T) = \mathbf{X} \mathbf{\beta} + \epsilon \]

Log(Time) vs. Covariates

Plot Log(Survival Time) vs. Continuous Covariates

  • If the relationship is linear, then the log-linearity assumption holds.
  • If there is a curved pattern, the log-normal AFT model might not be appropriate.
library(ggplot2)
library(dplyr)

aids %>%
  mutate(log_time = log(time)) %>%
  pivot_longer(cols = c(age, wtkg, cd80, cd820), names_to = "Variable", values_to = "Value") %>%
  ggplot(aes(x = Value, y = log_time)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE, color = "blue") +
  facet_wrap(~Variable, scales = "free_x") +
  theme_minimal() +
  labs(title = "Checking Log-Linearity: Log(Time) vs. Covariates",
       x = "Covariate Value",
       y = "Log(Survival Time)")

Residuals vs. Fitted Values

In an Accelerated Failure Time (AFT) model with a lognormal distribution, the theoretical residuals (ϵ) follow a normal distribution. So, we can check the normality assumption by plotting the residuals.

In order to check whether the assumed distribution of survival times fits the observed data sufficiently well, we can use the residuals of the model. However, because the residuals are calculated based on the observed event times, they will also be censored. Hence, to investigate whether they follow a particular distribution, we will need to employ a graphical procedure accounting for censoring. One way to achieve this is by using the Kaplan-Meier estimator of the residuals.

The definition of the residuals in the AFT model is given by:

\[ \frac{log(T_i) - \hat{\mu}_i}{\hat{\sigma}} \]

where \(T_i\) is the observed survival time, \(\hat{\mu}_i\) is the predicted survival time, and \(\hat{\sigma}\) is the estimated scale parameter of the AFT model. Therefore, construct the residuals based on their definition:

fitted_values <- aids.aft.lognormal$linear.predictors
resids <- (log(aids.aft.lognormal$y[, 1]) - fitted_values) / aids.aft.lognormal$scale

We then compute the Kaplan-Meier estimate of these residuals, and we plot it.

resKM <- survfit(Surv(resids, cid) ~ 1, data = aids)
plot(resKM, mark.time = FALSE, xlab = "AFT Residuals", ylab = "Survival Probability")
xx <- seq(min(resids), max(resids), length.out = 35)
yy <- 1-pnorm(xx)
lines(xx, yy, col = "red", lwd = 2)
legend("bottomleft", c("KM estimate", "95% CI for KM estimate", 
    "Survival function of standard normal"),
    lty = c(1,2,1), col = c(1,1,2), bty = "n")

Interactions

Explore whether lognormal AFT model has interactions between treatment and gender/sexual orientation

aids.aft.lognormal.interactions <- survreg(Surv(time, cid) ~ age + wtkg + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(race) +
                       factor(str2) + strat + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt) * factor(homo) + factor(trt) * factor(gender),
                       data = aids, dist = "lognormal")
summary(aids.aft.lognormal.interactions)
## 
## Call:
## survreg(formula = Surv(time, cid) ~ age + wtkg + factor(drugs) + 
##     karnof + factor(oprior) + factor(z30) + preanti + factor(race) + 
##     factor(str2) + strat + factor(symptom) + factor(offtrt) + 
##     cd80 + cd820 + factor(trt) * factor(homo) + factor(trt) * 
##     factor(gender), data = aids, dist = "lognormal")
##                                  Value Std. Error     z       p
## (Intercept)                   6.65e+00   5.62e-01 11.82 < 2e-16
## age                          -6.03e-03   3.55e-03 -1.70 0.08933
## wtkg                         -5.14e-04   2.40e-03 -0.21 0.83036
## factor(drugs)1                2.66e-01   9.87e-02  2.70 0.00695
## karnof                        1.68e-02   5.09e-03  3.31 0.00092
## factor(oprior)1              -2.81e-01   2.01e-01 -1.40 0.16279
## factor(z30)1                 -2.20e-01   1.54e-01 -1.43 0.15242
## preanti                      -1.95e-04   1.15e-04 -1.69 0.09096
## factor(race)1                 5.23e-03   7.36e-02  0.07 0.94335
## factor(str2)1                -5.26e-02   2.06e-01 -0.26 0.79864
## strat                         5.19e-02   1.15e-01  0.45 0.65059
## factor(symptom)1             -3.99e-01   7.61e-02 -5.25 1.6e-07
## factor(offtrt)1              -5.68e-01   6.36e-02 -8.93 < 2e-16
## cd80                         -4.30e-04   9.49e-05 -4.53 6.0e-06
## cd820                         3.61e-04   1.05e-04  3.42 0.00062
## factor(trt)1                  3.40e-01   1.57e-01  2.16 0.03042
## factor(homo)1                 7.46e-02   1.13e-01  0.66 0.50753
## factor(gender)1              -1.46e-01   1.42e-01 -1.02 0.30709
## factor(trt)1:factor(homo)1   -1.46e-01   1.67e-01 -0.88 0.38119
## factor(trt)1:factor(gender)1  1.36e-01   2.15e-01  0.63 0.52663
## Log(scale)                    4.20e-02   3.52e-02  1.19 0.23283
## 
## Scale= 1.04 
## 
## Log Normal distribution
## Loglik(model)= -4622   Loglik(intercept only)= -4736.4
##  Chisq= 228.69 on 19 degrees of freedom, p= 6.2e-38 
## Number of Newton-Raphson Iterations: 4 
## n= 2139

Conclusion:

The interaction terms are not significant, suggesting that the treatment effect is consistent across gender and sexual orientation groups.

Variable Selection

AIC

# Variable selection using stepwise AIC
best.model.aic <- stepAIC(aids.aft.lognormal, direction="both", 
                          k=2, trace=0)
summary(best.model.aic)
## 
## Call:
## survreg(formula = Surv(time, cid) ~ age + factor(drugs) + karnof + 
##     factor(oprior) + factor(z30) + preanti + factor(symptom) + 
##     factor(offtrt) + cd80 + cd820 + factor(trt), data = aids, 
##     dist = "lognormal")
##                      Value Std. Error     z       p
## (Intercept)       6.60e+00   5.19e-01 12.71 < 2e-16
## age              -6.30e-03   3.46e-03 -1.82 0.06836
## factor(drugs)1    2.73e-01   9.68e-02  2.82 0.00475
## karnof            1.69e-02   5.06e-03  3.34 0.00083
## factor(oprior)1  -2.72e-01   1.85e-01 -1.47 0.14048
## factor(z30)1     -2.02e-01   8.17e-02 -2.47 0.01339
## preanti          -1.63e-04   8.16e-05 -1.99 0.04637
## factor(symptom)1 -4.01e-01   7.54e-02 -5.32 1.1e-07
## factor(offtrt)1  -5.70e-01   6.34e-02 -8.99 < 2e-16
## cd80             -4.32e-04   9.46e-05 -4.57 4.8e-06
## cd820             3.59e-04   1.05e-04  3.41 0.00065
## factor(trt)1      3.53e-01   6.16e-02  5.73 1.0e-08
## Log(scale)        4.30e-02   3.52e-02  1.22 0.22215
## 
## Scale= 1.04 
## 
## Log Normal distribution
## Loglik(model)= -4623   Loglik(intercept only)= -4736.4
##  Chisq= 226.64 on 11 degrees of freedom, p= 2.1e-42 
## Number of Newton-Raphson Iterations: 4 
## n= 2139

BIC

n <- nrow(aids)
best.model.bic <- stepAIC(aids.aft.lognormal, direction="both", 
                          k=log(n), trace=0)
best.model.bic
## Call:
## survreg(formula = Surv(time, cid) ~ karnof + preanti + factor(symptom) + 
##     factor(offtrt) + cd80 + cd820 + factor(trt), data = aids, 
##     dist = "lognormal")
## 
## Coefficients:
##      (Intercept)           karnof          preanti factor(symptom)1 
##     6.2159930381     0.0181270251    -0.0003154474    -0.4073723504 
##  factor(offtrt)1             cd80            cd820     factor(trt)1 
##    -0.5514171184    -0.0004352931     0.0003698285     0.3548002879 
## 
## Scale= 1.048921 
## 
## Loglik(model)= -4632.2   Loglik(intercept only)= -4736.4
##  Chisq= 208.4 on 7 degrees of freedom, p= <2e-16 
## n= 2139

LASSO

# Load required packages
library(survival)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-8
library(dplyr)

# Step 1: Prepare Data - Convert categorical variables into dummy variables
aids_processed <- model.matrix(Surv(time, cid) ~ age + wtkg + factor(homo) + 
                              factor(drugs) + karnof + factor(oprior) + factor(z30) +
                              preanti + factor(race) + factor(gender) +
                              factor(str2) + strat + factor(symptom) +
                              factor(offtrt) + cd80 + cd820 +
                              factor(trt), 
                              data = aids)[,-1]  # Remove intercept column

# Response variable (log of survival time, as AFT assumes log-linear form)
y <- log(aids$time)  

# Censoring indicator
censoring <- aids$cid  

# Step 2: Fit LASSO model
lasso_fit <- glmnet(aids_processed, y, family = "gaussian", alpha = 1)  # LASSO (alpha=1)

# Step 3: Cross-validation to select optimal lambda (penalty parameter)
cv_lasso <- cv.glmnet(aids_processed, y, family = "gaussian", alpha = 1)

# Step 4: Identify optimal lambda
best_lambda <- cv_lasso$lambda.min  

# Step 5: Extract selected variables (non-zero coefficients)
lasso_coefficients <- coef(cv_lasso, s = best_lambda)  # Get coefficients at optimal lambda
selected_variables <- rownames(lasso_coefficients)[lasso_coefficients[,1] != 0]  # Remove zero coefficients

# Display selected variables
print(selected_variables)
##  [1] "(Intercept)"      "factor(homo)1"    "factor(drugs)1"   "karnof"          
##  [5] "factor(oprior)1"  "factor(z30)1"     "preanti"          "factor(race)1"   
##  [9] "factor(symptom)1" "factor(offtrt)1"  "cd80"             "cd820"           
## [13] "factor(trt)1"

Selected variables

The common variables selected by all methods are: trt, age, drugs, karnof, oprior, z30, preanti, symptom, offtrt, cd80, cd820.

best.aft.model <- survreg(Surv(time, cid) ~ age + factor(homo) + factor(race) +
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                          data = aids, dist = "lognormal")

summary(best.aft.model)
## 
## Call:
## survreg(formula = Surv(time, cid) ~ age + factor(homo) + factor(race) + 
##     factor(drugs) + karnof + factor(oprior) + factor(z30) + preanti + 
##     factor(symptom) + factor(offtrt) + cd80 + cd820 + factor(trt), 
##     data = aids, dist = "lognormal")
##                      Value Std. Error     z       p
## (Intercept)       6.62e+00   5.24e-01 12.64 < 2e-16
## age              -5.95e-03   3.51e-03 -1.69 0.09032
## factor(homo)1    -3.24e-02   7.12e-02 -0.46 0.64895
## factor(race)1     1.47e-02   7.27e-02  0.20 0.84025
## factor(drugs)1    2.64e-01   9.85e-02  2.68 0.00743
## karnof            1.67e-02   5.07e-03  3.30 0.00098
## factor(oprior)1  -2.72e-01   1.85e-01 -1.47 0.14083
## factor(z30)1     -2.05e-01   8.19e-02 -2.50 0.01240
## preanti          -1.60e-04   8.20e-05 -1.95 0.05144
## factor(symptom)1 -3.95e-01   7.59e-02 -5.21 1.9e-07
## factor(offtrt)1  -5.71e-01   6.35e-02 -8.99 < 2e-16
## cd80             -4.31e-04   9.47e-05 -4.55 5.4e-06
## cd820             3.60e-04   1.05e-04  3.41 0.00064
## factor(trt)1      3.54e-01   6.17e-02  5.74 9.5e-09
## Log(scale)        4.31e-02   3.52e-02  1.22 0.22064
## 
## Scale= 1.04 
## 
## Log Normal distribution
## Loglik(model)= -4622.9   Loglik(intercept only)= -4736.4
##  Chisq= 226.97 on 13 degrees of freedom, p= 3.8e-41 
## Number of Newton-Raphson Iterations: 4 
## n= 2139
round(best.aft.model$coefficients, 3)
##      (Intercept)              age    factor(homo)1    factor(race)1 
##            6.618           -0.006           -0.032            0.015 
##   factor(drugs)1           karnof  factor(oprior)1     factor(z30)1 
##            0.264            0.017           -0.272           -0.205 
##          preanti factor(symptom)1  factor(offtrt)1             cd80 
##            0.000           -0.395           -0.571            0.000 
##            cd820     factor(trt)1 
##            0.000            0.354
summary(best.aft.model)$table[, "Std. Error"]
##      (Intercept)              age    factor(homo)1    factor(race)1 
##     5.236486e-01     3.514783e-03     7.117672e-02     7.273944e-02 
##   factor(drugs)1           karnof  factor(oprior)1     factor(z30)1 
##     9.851679e-02     5.073129e-03     1.849545e-01     8.187272e-02 
##          preanti factor(symptom)1  factor(offtrt)1             cd80 
##     8.198066e-05     7.587983e-02     6.351147e-02     9.472054e-05 
##            cd820     factor(trt)1       Log(scale) 
##     1.054477e-04     6.165573e-02     3.520680e-02
round(exp(best.aft.model$coefficients), 3)
##      (Intercept)              age    factor(homo)1    factor(race)1 
##          748.362            0.994            0.968            1.015 
##   factor(drugs)1           karnof  factor(oprior)1     factor(z30)1 
##            1.302            1.017            0.762            0.815 
##          preanti factor(symptom)1  factor(offtrt)1             cd80 
##            1.000            0.673            0.565            1.000 
##            cd820     factor(trt)1 
##            1.000            1.425
round(exp(confint(best.aft.model)), 3)
##                    2.5 %   97.5 %
## (Intercept)      268.152 2088.537
## age                0.987    1.001
## factor(homo)1      0.842    1.113
## factor(race)1      0.880    1.170
## factor(drugs)1     1.073    1.579
## karnof             1.007    1.027
## factor(oprior)1    0.530    1.094
## factor(z30)1       0.694    0.957
## preanti            1.000    1.000
## factor(symptom)1   0.580    0.781
## factor(offtrt)1    0.499    0.640
## cd80               0.999    1.000
## cd820              1.000    1.001
## factor(trt)1       1.262    1.608
best.aft.model |>
  tbl_regression(intercept = TRUE,
                 digits = list("estimate" ~ 3))
## Warning: The `exponentiate` argument is not supported in the `tidy()` method
## for `survreg` objects and will be ignored.
Characteristic Beta 95% CI1 p-value
(Intercept) 6.6 5.6, 7.6 <0.001
age -0.01 -0.01, 0.00 0.090
factor(homo)


    0
    1 -0.03 -0.17, 0.11 0.6
factor(race)


    0
    1 0.01 -0.13, 0.16 0.8
factor(drugs)


    0
    1 0.26 0.07, 0.46 0.007
karnof 0.02 0.01, 0.03 <0.001
factor(oprior)


    0
    1 -0.27 -0.63, 0.09 0.14
factor(z30)


    0
    1 -0.20 -0.37, -0.04 0.012
preanti 0.00 0.00, 0.00 0.051
factor(symptom)


    0
    1 -0.40 -0.54, -0.25 <0.001
factor(offtrt)


    0
    1 -0.57 -0.70, -0.45 <0.001
cd80 0.00 0.00, 0.00 <0.001
cd820 0.00 0.00, 0.00 <0.001
factor(trt)


    0
    1 0.35 0.23, 0.47 <0.001
1 CI = Confidence Interval

Prediction Diagnostic

The common variables selected by all methods are: trt, age, drugs, karnof, oprior, z30, preanti, symptom, offtrt, cd80, cd820.

Psudo R^2

Cox’s

aids.coxph <- coxph(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                   data = aids, ties = 'breslow', x=TRUE, y=TRUE)

Exponential

# AFT model with exponential distribution
# Fit AFT model with exponential distribution
aids.aft.exponential <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "exponential", x=TRUE, y=TRUE)

Weibull

# AFT model with Weibull distribution
aids.aft.weibull <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "weibull", x=TRUE, y=TRUE)

Gaussian

# AFT model with Gaussian distribution
aids.aft.gaussian <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "gaussian", x=TRUE, y=TRUE)

Logistic

# AFT model with logistic
aids.aft.logistic <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "logistic", x=TRUE, y=TRUE)

Lognormal

# AFT model with Lognormal
aids.aft.lognormal <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "lognormal", x=TRUE, y=TRUE)

Loglogistic

# AFT model with loglogistic
aids.aft.loglogistic <- survreg(Surv(time, cid) ~ age + 
                       factor(drugs) + karnof + factor(oprior) + factor(z30) +
                       preanti + factor(symptom) +
                       factor(offtrt) + cd80 + cd820 +
                       factor(trt),
                       data = aids, dist = "loglogistic", x=TRUE, y=TRUE)
# Psedo R2 for each model
PseudoR2 <- c(
  coxph = pam.coxph(aids.coxph)$R.squared,
  exponential = pam.coxph(aids.aft.exponential)$R.squared,
  weibull = pam.coxph(aids.aft.weibull)$R.squared,
  gaussian = pam.coxph(aids.aft.gaussian)$R.squared,
  logistic = pam.coxph(aids.aft.logistic)$R.squared,
  lognormal = pam.coxph(aids.aft.lognormal)$R.squared,
  loglogistic = pam.coxph(aids.aft.loglogistic)$R.squared
)
## Warning in weight.km * (calibrate.fitted - sum(weight.km * y))^2: longer object
## length is not a multiple of shorter object length
## Warning in y - calibrate.fitted: longer object length is not a multiple of
## shorter object length
## Warning in weight.km * (calibrate.fitted - sum(weight.km * y))^2: longer object
## length is not a multiple of shorter object length
## Warning in y - calibrate.fitted: longer object length is not a multiple of
## shorter object length
# Print all Pseudo R2 values
print(PseudoR2)
##       coxph exponential     weibull    gaussian    logistic   lognormal 
##    "0.0909"    "0.0725"    "0.0873"        "NA"        "NA"    "0.0885" 
## loglogistic 
##    "0.0883"
# Identify the model with the lowest Pseudo R2
best_pseudoR2_model <- names(which.min(PseudoR2))
## Warning in which.min(PseudoR2): NAs introduced by coercion
best_pseudoR2_value <- min(PseudoR2)

cat("The model with the lowest Pseudo R2 is", best_pseudoR2_model, "with a value of", best_pseudoR2_value, "\n")
## The model with the lowest Pseudo R2 is exponential with a value of 0.0725
# L_squared for each model
L_squared <- c(
  coxph = pam.coxph(aids.coxph)$L.squared,
  exponential = pam.coxph(aids.aft.exponential)$L.squared,
  weibull = pam.coxph(aids.aft.weibull)$L.squared,
  gaussian = pam.coxph(aids.aft.gaussian)$L.squared,
  logistic = pam.coxph(aids.aft.logistic)$L.squared,
  lognormal = pam.coxph(aids.aft.lognormal)$L.squared,
  loglogistic = pam.coxph(aids.aft.loglogistic)$L.squared
)
## Warning in weight.km * (calibrate.fitted - sum(weight.km * y))^2: longer object
## length is not a multiple of shorter object length
## Warning in y - calibrate.fitted: longer object length is not a multiple of
## shorter object length
## Warning in weight.km * (calibrate.fitted - sum(weight.km * y))^2: longer object
## length is not a multiple of shorter object length
## Warning in y - calibrate.fitted: longer object length is not a multiple of
## shorter object length
# Print all L_squared values
print(L_squared)
##       coxph exponential     weibull    gaussian    logistic   lognormal 
##    "0.2840"    "0.1996"    "0.2065"        "NA"        "NA"    "0.2025" 
## loglogistic 
##    "0.2041"
# Identify the model with the lowest L_squared
best_L_squared_model <- names(which.min(L_squared))
## Warning in which.min(L_squared): NAs introduced by coercion
best_L_squared_value <- min(L_squared)

cat("The model with the lowest L_squared is", best_L_squared_model, "with a value of", best_L_squared_value)
## The model with the lowest L_squared is exponential with a value of 0.1996

Harrell’s C statistic

# Extract concordance index for each model
concordance_values <- c(
  exponential = concordance(aids.aft.exponential)$concordance,
  weibull = concordance(aids.aft.weibull)$concordance,
  gaussian = concordance(aids.aft.gaussian)$concordance,
  logistic = concordance(aids.aft.logistic)$concordance,
  lognormal = concordance(aids.aft.lognormal)$concordance,
  loglogistic = concordance(aids.aft.loglogistic)$concordance
)

# Print all concordance values
print(round(concordance_values, 4))
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##      0.6758      0.6760      0.6770      0.6765      0.6773      0.6769
print(rank(concordance_values))
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##           1           2           5           3           6           4
# Identify the model with the highest concordance
best_concordance_model <- names(which.max(concordance_values))
best_concordance_value <- max(concordance_values)

cat("The best AFT model based on concordance index is:", best_concordance_model, "with a C-index of", best_concordance_value, "\n")
## The best AFT model based on concordance index is: lognormal with a C-index of 0.6773082

Uno’s C

# Extract Uno's C index for each model
uno_values <- c(
  exponential = concordance(aids.aft.exponential, timewt="n/G2")$concordance,
  weibull = concordance(aids.aft.weibull, timewt="n/G2")$concordance,
  gaussian = concordance(aids.aft.gaussian, timewt="n/G2")$concordance,
  logistic = concordance(aids.aft.logistic, timewt="n/G2")$concordance,
  lognormal = concordance(aids.aft.lognormal, timewt="n/G2")$concordance,
  loglogistic = concordance(aids.aft.loglogistic, timewt="n/G2")$concordance
)

# Print all Uno's C values
print(round(uno_values, 4))
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##      0.6661      0.6662      0.6669      0.6666      0.6675      0.6671
print(rank(uno_values))
## exponential     weibull    gaussian    logistic   lognormal loglogistic 
##           1           2           4           3           6           5
# Identify the model with the highest Uno's C
best_uno_model <- names(which.max(uno_values))
best_uno_value <- max(uno_values)

cat("The best AFT model based on Uno's C index is:", best_uno_model, "with a C-index of", best_uno_value, "\n")
## The best AFT model based on Uno's C index is: lognormal with a C-index of 0.6674623

Back to top